{
  "cells": [
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "3jm8RYrLqvzz"
      },
      "source": [
        "# CLIP Interrogator 2.4 by [@pharmapsychotic](https://twitter.com/pharmapsychotic) \n",
        "\n",
        "Want to figure out what a good prompt might be to create new images like an existing one? The CLIP Interrogator is here to get you answers!\n",
        "\n",
        "<br>\n",
        "\n",
        "For Stable Diffusion 1.X choose the **ViT-L** model and for Stable Diffusion 2.0+ choose the **ViT-H** CLIP Model.\n",
        "\n",
        "This version is specialized for producing nice prompts for use with Stable Diffusion and achieves higher alignment between generated text prompt and source image. You can try out the old [version 1](https://colab.research.google.com/github/pharmapsychotic/clip-interrogator/blob/v1/clip_interrogator.ipynb) to see how different CLIP models ranks terms. \n",
        "\n",
        "You can also run this on HuggingFace and Replicate<br>\n",
        "[![Generic badge](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue.svg)](https://huggingface.co/spaces/pharma/CLIP-Interrogator) [![Replicate](https://replicate.com/pharmapsychotic/clip-interrogator/badge)](https://replicate.com/pharmapsychotic/clip-interrogator)\n",
        "\n",
        "<br>\n",
        "\n",
        "If this notebook is helpful to you please consider buying me a coffee via [ko-fi](https://ko-fi.com/pharmapsychotic) or following me on [twitter](https://twitter.com/pharmapsychotic) for more cool Ai stuff. 🙂\n",
        "\n",
        "And if you're looking for more Ai art tools check out my [Ai generative art tools list](https://pharmapsychotic.com/tools.html).\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "aP9FjmWxtLKJ"
      },
      "outputs": [],
      "source": [
        "#@title Check GPU\n",
        "!nvidia-smi -L"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "xpPKQR40qvz2"
      },
      "outputs": [],
      "source": [
        "#@title Setup\n",
        "import os, subprocess\n",
        "\n",
        "def setup():\n",
        "    install_cmds = [\n",
        "        ['pip', 'install', 'gradio'],\n",
        "        ['pip', 'install', 'open_clip_torch'],\n",
        "        ['pip', 'install', 'clip-interrogator'],\n",
        "    ]\n",
        "    for cmd in install_cmds:\n",
        "        print(subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode('utf-8'))\n",
        "\n",
        "setup()\n",
        "\n",
        "\n",
        "caption_model_name = 'blip-large' #@param [\"blip-base\", \"blip-large\", \"git-large-coco\"]\n",
        "clip_model_name = 'ViT-L-14/openai' #@param [\"ViT-L-14/openai\", \"ViT-H-14/laion2b_s32b_b79k\"]\n",
        "\n",
        "import gradio as gr\n",
        "from clip_interrogator import Config, Interrogator\n",
        "\n",
        "config = Config()\n",
        "config.clip_model_name = clip_model_name\n",
        "config.caption_model_name = caption_model_name\n",
        "ci = Interrogator(config)\n",
        "\n",
        "def image_analysis(image):\n",
        "    image = image.convert('RGB')\n",
        "    image_features = ci.image_to_features(image)\n",
        "\n",
        "    top_mediums = ci.mediums.rank(image_features, 5)\n",
        "    top_artists = ci.artists.rank(image_features, 5)\n",
        "    top_movements = ci.movements.rank(image_features, 5)\n",
        "    top_trendings = ci.trendings.rank(image_features, 5)\n",
        "    top_flavors = ci.flavors.rank(image_features, 5)\n",
        "\n",
        "    medium_ranks = {medium: sim for medium, sim in zip(top_mediums, ci.similarities(image_features, top_mediums))}\n",
        "    artist_ranks = {artist: sim for artist, sim in zip(top_artists, ci.similarities(image_features, top_artists))}\n",
        "    movement_ranks = {movement: sim for movement, sim in zip(top_movements, ci.similarities(image_features, top_movements))}\n",
        "    trending_ranks = {trending: sim for trending, sim in zip(top_trendings, ci.similarities(image_features, top_trendings))}\n",
        "    flavor_ranks = {flavor: sim for flavor, sim in zip(top_flavors, ci.similarities(image_features, top_flavors))}\n",
        "    \n",
        "    return medium_ranks, artist_ranks, movement_ranks, trending_ranks, flavor_ranks\n",
        "\n",
        "def image_to_prompt(image, mode):\n",
        "    ci.config.chunk_size = 2048 if ci.config.clip_model_name == \"ViT-L-14/openai\" else 1024\n",
        "    ci.config.flavor_intermediate_count = 2048 if ci.config.clip_model_name == \"ViT-L-14/openai\" else 1024\n",
        "    image = image.convert('RGB')\n",
        "    if mode == 'best':\n",
        "        return ci.interrogate(image)\n",
        "    elif mode == 'classic':\n",
        "        return ci.interrogate_classic(image)\n",
        "    elif mode == 'fast':\n",
        "        return ci.interrogate_fast(image)\n",
        "    elif mode == 'negative':\n",
        "        return ci.interrogate_negative(image)\n",
        "        "
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 677
        },
        "id": "Pf6qkFG6MPRj",
        "outputId": "8d542b56-8be7-453d-bf27-d0540a774c7d"
      },
      "outputs": [],
      "source": [
        "#@title Image to prompt! 🖼️ -> 📝\n",
        "   \n",
        "def prompt_tab():\n",
        "    with gr.Column():\n",
        "        with gr.Row():\n",
        "            image = gr.Image(type='pil', label=\"Image\")\n",
        "            with gr.Column():\n",
        "                mode = gr.Radio(['best', 'fast', 'classic', 'negative'], label='Mode', value='best')\n",
        "        prompt = gr.Textbox(label=\"Prompt\")\n",
        "    button = gr.Button(\"Generate prompt\")\n",
        "    button.click(image_to_prompt, inputs=[image, mode], outputs=prompt)\n",
        "\n",
        "def analyze_tab():\n",
        "    with gr.Column():\n",
        "        with gr.Row():\n",
        "            image = gr.Image(type='pil', label=\"Image\")\n",
        "        with gr.Row():\n",
        "            medium = gr.Label(label=\"Medium\", num_top_classes=5)\n",
        "            artist = gr.Label(label=\"Artist\", num_top_classes=5)        \n",
        "            movement = gr.Label(label=\"Movement\", num_top_classes=5)\n",
        "            trending = gr.Label(label=\"Trending\", num_top_classes=5)\n",
        "            flavor = gr.Label(label=\"Flavor\", num_top_classes=5)\n",
        "    button = gr.Button(\"Analyze\")\n",
        "    button.click(image_analysis, inputs=image, outputs=[medium, artist, movement, trending, flavor])\n",
        "\n",
        "with gr.Blocks() as ui:\n",
        "    with gr.Tab(\"Prompt\"):\n",
        "        prompt_tab()\n",
        "    with gr.Tab(\"Analyze\"):\n",
        "        analyze_tab()\n",
        "\n",
        "ui.launch(show_api=False, debug=False)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "cellView": "form",
        "id": "OGmvkzITN4Hz"
      },
      "outputs": [],
      "source": [
        "#@title Batch process a folder of images 📁 -> 📝\n",
        "\n",
        "#@markdown This will generate prompts for every image in a folder and either save results \n",
        "#@markdown to a desc.csv file in the same folder or rename the files to contain their prompts.\n",
        "#@markdown The renamed files work well for [DreamBooth extension](https://github.com/d8ahazard/sd_dreambooth_extension)\n",
        "#@markdown in the [Stable Diffusion Web UI](https://github.com/AUTOMATIC1111/stable-diffusion-webui).\n",
        "#@markdown You can use the generated csv in the [Stable Diffusion Finetuning](https://colab.research.google.com/drive/1vrh_MUSaAMaC5tsLWDxkFILKJ790Z4Bl?usp=sharing)\n",
        "\n",
        "import csv\n",
        "import os\n",
        "from IPython.display import clear_output, display\n",
        "from PIL import Image\n",
        "from tqdm import tqdm\n",
        "\n",
        "folder_path = \"/content/my_images\" #@param {type:\"string\"}\n",
        "prompt_mode = 'best' #@param [\"best\",\"fast\",\"classic\",\"negative\"]\n",
        "output_mode = 'rename' #@param [\"desc.csv\",\"rename\"]\n",
        "max_filename_len = 128 #@param {type:\"integer\"}\n",
        "\n",
        "\n",
        "def sanitize_for_filename(prompt: str, max_len: int) -> str:\n",
        "    name = \"\".join(c for c in prompt if (c.isalnum() or c in \",._-! \"))\n",
        "    name = name.strip()[:(max_len-4)] # extra space for extension\n",
        "    return name\n",
        "\n",
        "ci.config.quiet = True\n",
        "\n",
        "files = [f for f in os.listdir(folder_path) if f.endswith('.jpg') or f.endswith('.png')] if os.path.exists(folder_path) else []\n",
        "prompts = []\n",
        "for idx, file in enumerate(tqdm(files, desc='Generating prompts')):\n",
        "    if idx > 0 and idx % 100 == 0:\n",
        "        clear_output(wait=True)\n",
        "\n",
        "    image = Image.open(os.path.join(folder_path, file)).convert('RGB')\n",
        "    prompt = image_to_prompt(image, prompt_mode)\n",
        "    prompts.append(prompt)\n",
        "\n",
        "    print(prompt)\n",
        "    thumb = image.copy()\n",
        "    thumb.thumbnail([256, 256])\n",
        "    display(thumb)\n",
        "\n",
        "    if output_mode == 'rename':\n",
        "        name = sanitize_for_filename(prompt, max_filename_len)\n",
        "        ext = os.path.splitext(file)[1]\n",
        "        filename = name + ext\n",
        "        idx = 1\n",
        "        while os.path.exists(os.path.join(folder_path, filename)):\n",
        "            print(f'File {filename} already exists, trying {idx+1}...')\n",
        "            filename = f\"{name}_{idx}{ext}\"\n",
        "            idx += 1\n",
        "        os.rename(os.path.join(folder_path, file), os.path.join(folder_path, filename))\n",
        "\n",
        "if len(prompts):\n",
        "    if output_mode == 'desc.csv':\n",
        "        csv_path = os.path.join(folder_path, 'desc.csv')\n",
        "        with open(csv_path, 'w', encoding='utf-8', newline='') as f:\n",
        "            w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)\n",
        "            w.writerow(['image', 'prompt'])\n",
        "            for file, prompt in zip(files, prompts):\n",
        "                w.writerow([file, prompt])\n",
        "\n",
        "        print(f\"\\n\\n\\n\\nGenerated {len(prompts)} prompts and saved to {csv_path}, enjoy!\")\n",
        "    else:\n",
        "        print(f\"\\n\\n\\n\\nGenerated {len(prompts)} prompts and renamed your files, enjoy!\")\n",
        "else:\n",
        "    print(f\"Sorry, I couldn't find any images in {folder_path}\")\n"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "collapsed_sections": [],
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3.7.15 ('py37')",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.5"
    },
    "orig_nbformat": 4,
    "vscode": {
      "interpreter": {
        "hash": "1f51d5616d3bc2b87a82685314c5be1ec9a49b6e0cb1f707bfa2acb6c45f3e5f"
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
